Load protein abundance data, pre-process them into a format suitable for network analysis, and clean the data by removing obvious outlier samples as well as proteins and samples with excessive numbers of missing entries.
The expression data is contained in the files:
These files contains several quality parametres. We have used grouped protein abundance to compare samples.
Files not share same proteins, this is one important point to discuss. One aporximation is consider missing protein with 0 abundance. Also it can be considered only the shared proetins.
library(WGCNA)
library(readxl)
library(clusterProfiler)
library(dplyr)
library(sva)
library(EnhancedVolcano)
library(limma)
library(ggVennDiagram)
library(org.Hs.eg.db)
library(DT)
library(igraph)
library(WGCNA)
datatable_jm<-function(x,column=NULL){
# if(is.na(column)){column<-0}
datatable(
x,
extensions = 'Buttons',
filter = list(
position = 'top', clear = T
),
options = list(dom = 'Blfrtip',buttons = list(list(extend = 'colvis')),
buttons = list('copy', 'print',
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download')),
columnDefs = list(list(visible=FALSE, targets=column))))}data_A<-read_excel("/home/josep/Baixades/Resultats proteòmica(1)/ANDREU/1-9_A1_A2_A3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_A1vsA2vsA3.xlsx")
data_C<-read_excel("/home/josep/Baixades/Resultats proteòmica(1)/ANDREU/10-14_C1_C2_C3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_C1vsC2vsC3.xlsx")
data_AC<-read_excel("/home/josep/Baixades/Resultats proteòmica(1)/ANDREU/1-14_A1_A2_A3_C1_C2_C3_precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_AvsC.xlsx")
pheno<-read_excel("./nomenclatura.xlsx")
## comparacio exceles ####
data_C_abundance<-data_C[,grep("Abundances [(]Grouped[)]:",colnames(data_C),ignore.case = T)]
data_C_abundance<-data.frame(data_C_abundance)
colnames(data_C_abundance)<-pheno$`Nomenclatura unificada`[4:6]
rownames(data_C_abundance)<-data_C$Accession
data_A_abundance<-data_A[,grep("Abundances [(]Grouped[)]:",colnames(data_A),ignore.case = T)]
data_A_abundance<-data.frame(data_A_abundance)
rownames(data_A_abundance)<-data_A$Accession
colnames(data_A_abundance)<-pheno$`Nomenclatura unificada`[1:3]
# Ven diagramm de comuns ####
# Matriu amb A i C junts ####
data_abundance<-merge(data_C_abundance,data_A_abundance,by="row.names",all=T)
data_abundance_0<-data_abundance
data_abundance_0[is.na(data_abundance_0)] <- 0
dir.create("./DATA_IN")
dir.create("./RESULTATS")
dir.create("./RESULTATS/OBJECTES")
save(data_abundance_0,file ="./RESULTATS/OBJECTES/data_abundance_0" )
save(data_abundance,file ="./RESULTATS/OBJECTES/data_abundance" )
colnames(data_abundance)## [1] "Row.names" "aP-EV1" "aP-EV2" "aP-EV3" "PL-EV1" "PL-EV2"
## [7] "PL-EV3"
# Load the WGCNA package
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
#Read in the female liver data set
pheno<-read_excel("./nomenclatura.xlsx")
# Take a quick look at what is in the data set:
load("RESULTATS/OBJECTES/data_abundance_0")
datExpr0 = as.data.frame(data_abundance_0)
rownames(datExpr0) = datExpr0$Row.names
datExpr0<-datExpr0[,-1]
datExpr0<-t(datExpr0)The expression data set contains 6 samples. Note that each row corresponds to a protein and column to a sample.
We first check for proteins and samples with too many missing values:
gsg = goodSamplesGenes(datExpr0, verbose = 3);## Flagging genes and samples with too many missing values...
## ..step 1
## ..Excluding 16 genes from the calculation due to too many missing samples or zero variance.
## ..step 2
gsg$allOK## [1] FALSE
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(colnames(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}## Removing genes: P08670, P0CG39, P0DPH8
## P0DPH7, P11217, P12036, P14136, P42229, P49593, P62745, Q12840, Q5T0T0, Q5TID7, Q6A162, Q8TE73, Q9P2T1, Q9Y4G6
Next we cluster the samples (in contrast to clustering proteins that will come later) to see if there are any obvious outliers.
sampleTree = hclust(dist(datExpr0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
# sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)pheno<-read_excel("./nomenclatura.xlsx")
pheno$grup<-as.factor(c("NaCl","NaCl","NaCl","PR","PR","PR"))
# remove columns that hold information we do not need.
allTraits = data.frame(pheno)
# Form a data frame analogous to expression data that will hold the clinical traits.
nameSamples = rownames(datExpr0);
traitRows = match(nameSamples, allTraits$Nomenclatura.unificada);
datTraits = allTraits[traitRows,];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage()We now have the expression data in the variable datExpr, and the corresponding clinical traits in the variable datTraits.
Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram
# Re-cluster samples
sampleTree2 = hclust(dist(datExpr0), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(as.numeric(datTraits$grup), signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")Groups are very different (between other things, are not share proteins).
Data are re-normalized with combat.
datExpr0<-
ComBat(
t(datExpr0),batch = c(rep("ap",3),rep("PL",3)),
mod = NULL,
par.prior = TRUE,
prior.plots = FALSE,
mean.only = FALSE,
ref.batch = NULL,
BPPARAM = bpparam("SerialParam")
)## Found 726 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
datExpr0<-t(datExpr0)Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram
# Re-cluster samples
sampleTree2 = hclust(dist(datExpr0), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(as.numeric(datTraits$grup), signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")Constructing a weighted gene network entails the choice of the soft thresholding power β to which co-expression similarity is raised to calculate adjacency [1]. The authors of [1] have proposed to choose the soft thresholding power based on the criterion of approximate scale-free topology. We refer the reader to that work for more details; here we illustrate the use of the function pickSoftThreshold that performs the analysis of network topology and aids the user in choosing a proper soft-thresholding power.
# Choose a set of soft-thresholding powers
powers = c(c(1:50), seq(from = 52, to=100, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr0,
networkType = "signed hybrid",
powerVector = powers, verbose = 5)## pickSoftThreshold: will use block size 1154.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1154 of 1154
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.33900 0.479 0.2080 353.0 366.00 561.0
## 2 2 0.04390 0.130 -0.1220 247.0 241.00 441.0
## 3 3 0.00942 -0.051 -0.0673 192.0 180.00 370.0
## 4 4 0.09780 -0.152 0.1150 158.0 147.00 325.0
## 5 5 0.27000 -0.237 0.4280 136.0 123.00 292.0
## 6 6 0.39700 -0.285 0.6030 119.0 107.00 266.0
## 7 7 0.49100 -0.335 0.7230 106.0 95.00 245.0
## 8 8 0.54600 -0.366 0.7150 96.2 85.20 229.0
## 9 9 0.62100 -0.389 0.7630 88.1 78.00 215.0
## 10 10 0.63700 -0.403 0.7490 81.3 72.20 203.0
## 11 11 0.70400 -0.422 0.7740 75.6 66.50 192.0
## 12 12 0.69100 -0.436 0.7470 70.8 61.60 183.0
## 13 13 0.69100 -0.450 0.7120 66.5 57.20 175.0
## 14 14 0.69800 -0.457 0.7190 62.8 53.80 167.0
## 15 15 0.68000 -0.467 0.6820 59.5 50.50 160.0
## 16 16 0.70000 -0.478 0.6910 56.6 47.70 154.0
## 17 17 0.70700 -0.489 0.6890 54.0 45.10 148.0
## 18 18 0.72800 -0.492 0.7070 51.6 42.90 143.0
## 19 19 0.72400 -0.496 0.7050 49.4 41.30 138.0
## 20 20 0.72900 -0.501 0.7080 47.5 39.40 133.0
## 21 21 0.74500 -0.503 0.7250 45.7 37.80 129.0
## 22 22 0.75700 -0.505 0.7340 44.0 36.10 125.0
## 23 23 0.78200 -0.507 0.7550 42.5 34.60 122.0
## 24 24 0.78400 -0.509 0.7530 41.0 33.60 119.0
## 25 25 0.79300 -0.513 0.7600 39.7 32.90 116.0
## 26 26 0.77100 -0.516 0.7340 38.5 32.00 113.0
## 27 27 0.77000 -0.518 0.7280 37.3 31.20 110.0
## 28 28 0.75800 -0.520 0.7090 36.2 30.50 108.0
## 29 29 0.75000 -0.524 0.6950 35.2 29.70 105.0
## 30 30 0.73500 -0.529 0.6750 34.3 29.00 103.0
## 31 31 0.72700 -0.539 0.6640 33.4 28.40 101.0
## 32 32 0.73400 -0.542 0.6730 32.5 27.90 98.6
## 33 33 0.72400 -0.546 0.6570 31.7 27.40 96.6
## 34 34 0.70900 -0.548 0.6350 30.9 26.80 94.7
## 35 35 0.71100 -0.550 0.6370 30.2 26.10 93.0
## 36 36 0.69900 -0.554 0.6210 29.5 25.60 91.3
## 37 37 0.71700 -0.557 0.6420 28.9 24.90 89.7
## 38 38 0.70700 -0.558 0.6300 28.2 24.30 88.2
## 39 39 0.69900 -0.561 0.6190 27.6 23.80 86.7
## 40 40 0.71000 -0.564 0.6320 27.1 23.40 85.3
## 41 41 0.71200 -0.569 0.6330 26.5 23.00 83.9
## 42 42 0.72200 -0.568 0.6450 26.0 22.40 82.6
## 43 43 0.71700 -0.569 0.6390 25.5 21.90 81.3
## 44 44 0.73700 -0.571 0.6640 25.0 21.40 80.1
## 45 45 0.75000 -0.571 0.6800 24.5 21.00 78.9
## 46 46 0.75300 -0.572 0.6830 24.1 20.50 77.8
## 47 47 0.75000 -0.575 0.6790 23.7 20.10 76.8
## 48 48 0.74400 -0.580 0.6710 23.2 19.70 75.7
## 49 49 0.75100 -0.586 0.6800 22.8 19.40 74.8
## 50 50 0.75700 -0.592 0.6880 22.5 19.20 73.8
## 51 52 0.75400 -0.601 0.6850 21.7 18.40 72.0
## 52 54 0.75500 -0.605 0.6870 21.0 17.60 70.3
## 53 56 0.74700 -0.615 0.6780 20.4 16.90 68.7
## 54 58 0.74200 -0.620 0.6750 19.8 16.30 67.1
## 55 60 0.75500 -0.627 0.6930 19.2 15.80 65.7
## 56 62 0.75400 -0.633 0.6950 18.7 15.30 64.3
## 57 64 0.76500 -0.639 0.7100 18.2 14.80 63.0
## 58 66 0.76300 -0.643 0.7050 17.7 14.40 61.7
## 59 68 0.76400 -0.644 0.7070 17.3 14.00 60.5
## 60 70 0.75300 -0.650 0.6940 16.9 13.60 59.3
## 61 72 0.76400 -0.652 0.7100 16.5 13.20 58.2
## 62 74 0.77000 -0.654 0.7200 16.1 12.80 57.2
## 63 76 0.76500 -0.659 0.7140 15.7 12.50 56.2
## 64 78 0.77200 -0.665 0.7270 15.4 12.20 55.2
## 65 80 0.78000 -0.668 0.7400 15.0 11.90 54.3
## 66 82 0.78500 -0.668 0.7490 14.7 11.60 53.4
## 67 84 0.78900 -0.670 0.7570 14.4 11.30 52.6
## 68 86 0.78800 -0.673 0.7560 14.1 11.00 51.8
## 69 88 0.79900 -0.675 0.7740 13.8 10.80 51.0
## 70 90 0.80400 -0.679 0.7810 13.6 10.50 50.3
## 71 92 0.80400 -0.682 0.7810 13.3 10.20 49.6
## 72 94 0.79600 -0.685 0.7750 13.1 10.00 48.9
## 73 96 0.80200 -0.690 0.7840 12.8 9.81 48.2
## 74 98 0.81200 -0.694 0.8020 12.6 9.63 47.5
## 75 100 0.81000 -0.699 0.8040 12.4 9.45 46.9
# Plot the results:
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding powerplot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")Constructing the gene network and identifying modules is now a simple function call:
##### ATTENTION: IF you get this error:
##### Error in (new("standardGeneric", .Data = function (x, y = NULL, use = "everything", :
##### unused arguments (weights.x = NULL, weights.y = NULL, cosine = FALSE)
##### do the following:
#in RStudio... from the "Session" pulldown, press restart R.
#library(WGCNA).
#Then run the blockwiseModules function again
library(WGCNA)
pw<-11
# net = blockwiseModules(datExpr0,
# power = pw,
# TOMType = "unsigned",
# minModuleSize = 30,
# reassignThreshold = 0,
# mergeCutHeight = 0.25,
# numericLabels = TRUE,
# pamRespectsDendro = FALSE,
# saveTOMs = TRUE,
# saveTOMFileBase = "TOM_0",
# verbose = 3)
# save(net,file="net")
load("net")We have chosen the soft thresholding power 11 , a relatively large minimum module size of 30, and a medium sensitivity (deepSplit=2) to cluster splitting. The parameter mergeCutHeight is the threshold for merging of modules. We have also instructed the function to return numeric, rather than color, labels for modules, and to save the Topological Overlap Matrix. The output of the function may seem somewhat cryptic, but it is easy to use. For example, net\(colors contains the module assignment, and net\)MEs contains the module eigengenes of the modules.
# table(net$colors)The dendrogram can be displayed together with the color assignment using the following code:
# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneathplotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)We note that if the user would like to change some of the tree cut, module membership, and module merging criteria, the package provides the function recutBlockwiseTrees that can apply modified criteria without having to recompute the network and the clustering dendrogram. This may save a sub-stantial amount of time. We now save the module assignment and module eigengene information necessary for subsequent analysis.
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
file = "FemaleLiver-02-networkConstruction-auto.RData")In this analysis we would like to identify modules that are significantly associated with the measured clinical traits. Since we already have a summary profile (eigengene) for each module, we simply correlate eigengenes with external traits and look for the most significant associations
# Define numbers of genes and samples
nGenes = ncol(datExpr0)
nSamples = nrow(datExpr0)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr0, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, as.numeric(datTraits$grup), use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value:
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits)[4],
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))The analysis identifies the 2 significant module–trait associations. We will concentrate on GRUP as the trait of interest.
We quantify associations of individual genes with our trait of interest by defining Gene Significance GS as (the absolute value of) the correlation between the gene and the trait. For each module, we also define a quantitative measure of module membership MM as the correlation of the module eigengene and the gene expression profile. This allows us to quantify the similarity of all proteins on the array to every module
# Define variable weight containing the weight column of datTrait
grup = as.data.frame(as.numeric(datTraits$grup));
names(grup) = "grup"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr0, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
geneTraitSignificance = as.data.frame(cor(datExpr0, grup, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
names(geneTraitSignificance) = paste("GS.", names(grup), sep="")
names(GSPvalue) = paste("p.GS.", names(grup), sep="")##3.c Intramodular analysis: identifying genes with high GS and MM
Using the GS and MM measures, we can identify genes that have a high significance for weight as well as high module membership in interesting modules. As an example, we look at the brown module that has the highest association with weight. We plot a scatterplot of Gene Significance vs. Module Membership in the significant modules
moduls_int<-gsub("ME","",rownames(moduleTraitPvalue)[moduleTraitPvalue<=0.05])
moduls_int_no_sig<-gsub("ME","",rownames(moduleTraitPvalue)[moduleTraitPvalue==max(moduleTraitPvalue)])
module = moduls_int[1]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)module = moduls_int[2]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)GS and MM are correlated, illustrating that proteins highly significantly associated with a trait are often also the most important (central) elements of modules associated with the trait. The reader is encouraged to try this code with other significance trait/module correlation.
We have found modules with high association with our trait of interest, and have identified their central players by the Module Membership measure. We now merge this statistical information with protein annotation.
# colnames(datExpr0)
# colnames(datExpr0)[moduleColors=="green"]gene<-colnames(datExpr0)[moduleColors==moduls_int[1]]
gene<-unlist(strsplit(gene,"\r\n"))
kegg<-enrichKEGG(gene,
organism = "hsa",
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
assign(paste0("kegg_",moduls_int[1]),kegg)
go <- enrichGO(gene = gene,
OrgDb = org.Hs.eg.db,
ont = "BP",keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
assign(paste0("go_",moduls_int[1]),go)
gene<-colnames(datExpr0)[moduleColors==moduls_int[2]]
gene<-unlist(strsplit(gene,"\r\n"))
kegg<-enrichKEGG(gene,
organism = "hsa",
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
assign(paste0("kegg_",moduls_int[2]),kegg)
go <- enrichGO(gene = gene,
OrgDb = org.Hs.eg.db,
ont = "BP",keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
assign(paste0("go_",moduls_int[2]),go)datatable(get(paste0("kegg_",moduls_int[1]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[1],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))datatable(get(paste0("go_",moduls_int[1]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[1],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))datatable(get(paste0("kegg_",moduls_int[2]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[2],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))datatable(get(paste0("go_",moduls_int[2]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[2],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create such network plots; Fig. 1 was created using the following code. This code can be executed only if the network was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If the networks were calculated using the block-wise approach, the user will need to modify this code to perform the visualization in each block separately. The modification is simple and we leave it as an exercise for the interested reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr0, power = 9);## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
sizeGrWindow(9,9)TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr0, moduleColors)$eigengenes
# Isolate weight from the clinical traits
grup = as.data.frame(as.numeric(datTraits$grup ))
names(grup) = "grup"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, grup))
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)The function produces a dendrogram of the eigengenes and trait(s), and a heatmap of their relationships. To split the dendrogram and heatmap plots, we can use the following code
# Plot the dendrogram
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
plotHeatmaps = FALSE)# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
plotDendrograms = FALSE, xLabelsAngle = 90)TOM <-1-dissTOM
modules = moduls_int
probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
# cyt = exportNetworkToCytoscape(modTOM,
# edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
# nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
# weighted = TRUE,
# threshold = 0.02,
# nodeNames = modProbes,
# # altNodeNames = modGenes,
# nodeAttr = moduleColors[inModule])adj <- modTOM
adj[adj > 0.5] = 1
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network) # removes self-loops
results <- net
V(network)$color <- moduleColors[inModule]
V(network)$size<-6
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
V(network)$label.cex = 0.2
igraph::plot.igraph(network,
# mark.groups = T,
layout=layout.fruchterman.reingold(network),
edge.arrow.size = 0.1)modules = moduls_int[1]
probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
adj <- modTOM
adj[adj > 0.3] = 1
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network) # removes self-loops
results <- net
V(network)$color <- moduleColors[inModule]
V(network)$size<-6
V(network)$label.cex = 0.2
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
igraph::plot.igraph(network,
# mark.groups = T,
layout=layout.fruchterman.reingold(network),
edge.arrow.size = 0.1)modules = moduls_int[2]
probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
adj <- modTOM
adj[adj > 0.3] = 1
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network) # removes self-loops
results <- net
V(network)$label.cex = 0.2
V(network)$color <- moduleColors[inModule]
V(network)$size<-6
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
igraph::plot.igraph(network,
# mark.groups = T,
layout=layout.fruchterman.reingold(network),
edge.arrow.size = 0.1)data_AC$logFC<-
data_AC$`Abundance Ratio (log2): (Lisat Plaquetes) / (plasma_PRP)`
data_AC$adj.P.Val<-
data_AC$`Abundance Ratio Adj. P-Value: (Lisat Plaquetes) / (plasma_PRP)`
EnhancedVolcano(data_AC,
lab = data_AC$Accession,
# selectLab = genes_int,
labCol = 'black',
labFace = 'bold',
x = "logFC",
y = "adj.P.Val",
pCutoff = 0.05,
FCcutoff = 2,
ylim = c(0,11),
# xlim = c(-3,3),
pointSize = 1,
labSize = 2,
colAlpha = 2,
legendPosition = 'top',
legendLabSize = 8,
legendIconSize = 4.0,
drawConnectors = T,
widthConnectors = 0.75,
boxedLabels = T,
title = "DE proteins ",
subtitle = "Data from excel with samples A and C")groups<-as.factor(c(rep("ap",3),rep("PL",3)))
f = factor(groups)
design = model.matrix(~ 0 + f)
colnames(design) = gsub("f","",colnames(design))
head(datExpr0[,1:5])## A0A075B6I0 A0A075B6I9 A0A075B6K4 A0A075B6S5 A0A087WW87\r\nP01614
## aP-EV1 1329905 0.0 5170883 0.0 5707564
## aP-EV2 10606925 0.0 30584432 0.0 25283103
## aP-EV3 46057573 0.0 42908825 0.0 36806870
## PL-EV1 6298201 15581573.9 0 323688.0 17063633
## PL-EV2 5967341 997393.8 0 477809.4 16790989
## PL-EV3 50893998 50143614.2 0 1580733.5 25831799
data.fit = lmFit(t(datExpr0),design)
contrast.matrix = makeContrasts(ap-PL,levels=design)
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)
table_DEG_all <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr")
table_DEG <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr",p.value = 0.05)
EnhancedVolcano(table_DEG_all,
lab = rownames(table_DEG_all),
# selectLab = genes_int,
labCol = 'black',
labFace = 'bold',
x = "logFC",
y = "adj.P.Val",
pCutoff = 0.05,
FCcutoff = 1,
ylim = c(0,6),
# xlim = c(-3,3),
pointSize = 1,
labSize = 2,
colAlpha = 2,
legendPosition = 'top',
legendLabSize = 8,
legendIconSize = 4.0,
drawConnectors = T,
widthConnectors = 0.75,
boxedLabels = T,
title = "DE proteins ",
subtitle = "Data calculated with limma (no log2)") table_DEG_all$logFC[sign(table_DEG_all$logFC)<0]<-
log2(1/abs(table_DEG_all$logFC[sign(table_DEG_all$logFC)<0]))
table_DEG_all$logFC[sign(table_DEG_all$logFC)>0]<-
log2(table_DEG_all$logFC[sign(table_DEG_all$logFC)>0])
EnhancedVolcano(table_DEG_all,
lab = rownames(table_DEG_all),
# selectLab = genes_int,
labCol = 'black',
labFace = 'bold',
x = "logFC",
y = "adj.P.Val",
pCutoff = 0.05,
FCcutoff = 1,
ylim = c(0,5),
# xlim = c(-3,3),
pointSize = 1,
labSize = 2,
colAlpha = 2,
legendPosition = 'top',
legendLabSize = 8,
legendIconSize = 4.0,
drawConnectors = T,
widthConnectors = 0.75,
boxedLabels = T,
title = "DE proteins ",
subtitle = "Data calculated with limma (log2 after limma)")data_AC_deg<-
data_AC %>%
filter(adj.P.Val<=0.05) %>%
dplyr::select(Accession)
table_DEG_all_deg<-
rownames(table_DEG_all %>%
filter(adj.P.Val<=0.05) )
# List of items
x <- list(Precalculated = data_AC_deg$Accession, Limma = table_DEG_all_deg)
# 2D Venn diagram
ggVennDiagram(x,set_color ="red")+
scale_fill_gradient2()+
scale_color_manual(values = c("black","black"))datExpr0_converted<-datExpr0
datExpr0_converted[datExpr0_converted==0]<-0.1
datExpr0_l<-log2(datExpr0_converted)
groups<-as.factor(c(rep("ap",3),rep("PL",3)))
f = factor(groups)
design = model.matrix(~ 0 + f)
colnames(design) = gsub("f","",colnames(design))
data.fit = lmFit(t(datExpr0_l),design)
contrast.matrix = makeContrasts(ap-PL,levels=design)
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)
table_DEG_all <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr")
table_DEG_with_0 <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr",p.value = 0.05)
table_DEG_all<-
table_DEG_all %>%
mutate(celltype1=ifelse(logFC< (-30),"Negatius",ifelse(logFC>30,"Positius","NO")))
celltype1<-rownames(table_DEG_all)[table_DEG_all$celltype1=="Negatius"]
celltype2<-rownames(table_DEG_all)[table_DEG_all$celltype1=="Positius"]
EnhancedVolcano(table_DEG_all,
# encircle = c(celltype1),encircleAlpha = 0.5,
# shade = celltype2,
# shadeBins = 10,
# shadeSize=1000,
lab = rownames(table_DEG_all),
# selectLab = genes_int,
labCol = 'black',
labFace = 'bold',
x = "logFC",
y = "adj.P.Val",
pCutoff = 0.05,
FCcutoff = 1,
# ylim = c(0,7),
# xlim = c(-3,3),
pointSize = 1,
labSize = 2,
colAlpha = 2,
legendPosition = 'top',
legendLabSize = 8,
legendIconSize = 4.0,
drawConnectors = T,
widthConnectors = 0.75,
boxedLabels = T,
title = "DE proteins",
subtitle = "data log2 wiht 0") data_abundance_complete<-data_abundance[complete.cases(data_abundance),]
datExprcomplete = as.data.frame(data_abundance_complete)
rownames(datExprcomplete) = datExprcomplete$Row.names
datExprcomplete<-datExprcomplete[,-1]
datExprcomplete<-t(datExprcomplete)
datExpr0_converted<-datExprcomplete
datExpr0_l<-log2(datExpr0_converted)
groups<-as.factor(c(rep("ap",3),rep("PL",3)))
f = factor(groups)
design = model.matrix(~ 0 + f)
colnames(design) = gsub("f","",colnames(design))
data.fit = lmFit(t(datExpr0_l),design)
contrast.matrix = makeContrasts(ap-PL,levels=design)
colnames(contrast.matrix)## [1] "ap - PL"
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)
table_DEG_all <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr")
table_DEG_complete <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr",p.value = 0.05)
EnhancedVolcano(table_DEG_all,
lab = rownames(table_DEG_all),
# selectLab = genes_int,
labCol = 'black',
labFace = 'bold',
x = "logFC",
y = "adj.P.Val",
pCutoff = 0.05,
FCcutoff = 1,
ylim = c(0,6),
# xlim = c(-3,3),
pointSize = 1,
labSize = 7,
colAlpha = 2,
legendPosition = 'top',
legendLabSize = 8,
legendIconSize = 4.0,
drawConnectors = T,
widthConnectors = 0.75,
boxedLabels = T,
title = "DE proteins",
subtitle = "data two excels wihtout 0") data_AC_deg<-
data_AC %>%
filter(adj.P.Val<=0.05) %>%
dplyr::select(Accession)
table_DEG_all_deg<-
rownames(table_DEG_all %>%
filter(adj.P.Val<=0.05) )
# List of items
x <- list(Complete_cases = rownames(table_DEG_complete), With_0 = rownames(table_DEG_with_0))
# 2D Venn diagram
ggVennDiagram(x,set_color ="red")+
# scale_fill_gradient2()+
scale_color_manual(values = c("black","black"))x <- list(Complete_cases = rownames(table_DEG_complete), With_0 = rownames(table_DEG_with_0),Precalculated = data_AC_deg$Accession, Limma = table_DEG_all_deg)
ggVennDiagram(x,set_color ="red")+
scale_fill_gradient2()data_AC_sig<-
data_AC %>%
filter(adj.P.Val<=0.05)
gene<-data_AC_sig$logFC
names(gene)<-data_AC_sig$Accession
kegg<-enrichKEGG(names(gene),
organism = "hsa",
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
kegg_sig<-
kegg@result %>%
filter(p.adjust<=0.05)
datatable_jm(kegg_sig,"geneID")go <- enrichGO(gene = names(gene),
OrgDb = org.Hs.eg.db,
ont = "BP",
keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
go_sig<-
go@result %>%
filter(p.adjust<=0.05)
datatable_jm(go_sig,"geneID")gene<-data_AC$logFC
names(gene)<-data_AC$Accession
gene_list<-sort(gene,decreasing = T)
set.seed(123)
go_gsea <- gseGO(geneList=gene_list,
ont ="BP",
keyType = "UNIPROT",
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Hs.eg.db,
pAdjustMethod = "BH")
go_gsea_sig<- go_gsea@result %>%
filter(p.adjust<0.05)
datatable_jm(go_gsea_sig,"core_enrichment")kegg_gsea <- gseKEGG(geneList = gene_list,
organism = "hsa",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
keyType = "uniprot")
kegg_gsea_sig<- kegg_gsea@result %>%
filter(p.adjust<0.05)
datatable_jm(kegg_gsea_sig,"core_enrichment")